# load required libraries
library("lme4")
library("ggplot2")
library("here")
library("tidyverse")
library("emmeans")
library("brms")
library("DHARMa")
library("glmmTMB")
library("performance")
library("scales")
A pseudonymized/codified dataset was used as a precaution due to ethical considerations (patient-level clinical data). The full dataset is available upon request (see manuscript data availability statement).
# load the data
df <- read_csv(here("./data/evSCAs_codified.csv"), show_col_types = FALSE) %>%
rename(Experiment = `Experiment (= Sample collection Number)`) %>%
rename(HBB.genotype = `HBB Genotype`) %>%
fill(c(ID, Experiment, Age, Gender, Village, Ethnicity, Symptomatic), .direction = "down") %>%
# mutate(across(c(ID, Experiment, Sample, HBB.genotype, Gender, Village, Ethnicity, Symptomatic), as.factor)) %>%
mutate(across(c(`Date of IFA staining`, `Date of IFA counting`), as.Date)) %>%
select(Sample, Total_parasite_counts, Gametocyte_counts, HBB.genotype, varATS_par_µL_D0,
Age, Gender, Village, Ethnicity, Symptomatic) %>%
mutate(HBB.genotype = case_when(
HBB.genotype == 0 ~ "HbAA",
HBB.genotype == 1 ~ "HbAC",
HBB.genotype == 2 ~ "HbAS",
)) %>%
mutate(across(c(Sample, HBB.genotype, Gender, Village, Ethnicity, Symptomatic), as.factor)) %>%
mutate(HBB.genotype = fct_relevel(HBB.genotype, "HbAA")) # set HbAA as baseline reference
str(df)
## tibble [96 × 10] (S3: tbl_df/tbl/data.frame)
## $ Sample : Factor w/ 32 levels "1","2","3","4",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Total_parasite_counts: num [1:96] 1023 1034 1033 1030 1038 ...
## $ Gametocyte_counts : num [1:96] 40 33 45 49 31 44 17 22 10 54 ...
## $ HBB.genotype : Factor w/ 3 levels "HbAA","HbAC",..: 1 1 1 2 2 2 1 1 1 3 ...
## $ varATS_par_µL_D0 : num [1:96] 6490 6490 6490 3350 3350 ...
## $ Age : num [1:96] 6 6 6 9.6 9.6 9.6 7.4 7.4 7.4 7.3 ...
## $ Gender : Factor w/ 2 levels "0","1": 1 1 1 2 2 2 2 2 2 2 ...
## $ Village : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
## $ Ethnicity : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ Symptomatic : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 1 1 1 ...
summary(df)
## Sample Total_parasite_counts Gametocyte_counts HBB.genotype
## 1 : 3 Min. :1000 Min. : 10.00 HbAA:42
## 2 : 3 1st Qu.:1009 1st Qu.: 27.75 HbAC:36
## 3 : 3 Median :1020 Median : 44.00 HbAS:18
## 4 : 3 Mean :1029 Mean : 70.58
## 5 : 3 3rd Qu.:1039 3rd Qu.: 63.50
## 6 : 3 Max. :1164 Max. :610.00
## (Other):78
## varATS_par_µL_D0 Age Gender Village Ethnicity Symptomatic
## Min. : 54 Min. : 4.900 0:48 1:18 1:78 0:39
## 1st Qu.: 1508 1st Qu.: 6.375 1:48 2:27 2:18 1:57
## Median : 4564 Median : 7.650 3:45
## Mean :10615 Mean : 8.409 4: 6
## 3rd Qu.: 7089 3rd Qu.: 8.150
## Max. :87894 Max. :35.800
##
Box plots and scatter plots of mean conversion rate (averaged across replicates) versus the different categorical/continuous variables.
df_grouped <- df %>%
group_by(Sample) %>%
mutate(mean_prop = mean(Gametocyte_counts/Total_parasite_counts)) %>%
distinct(Sample, .keep_all = TRUE)
boxplot(df_grouped$mean_prop ~ df_grouped$HBB.genotype)
boxplot(df_grouped$mean_prop ~ df_grouped$Village)
boxplot(df_grouped$mean_prop ~ df_grouped$Symptomatic)
boxplot(df_grouped$mean_prop ~ df_grouped$Ethnicity)
boxplot(df_grouped$mean_prop ~ df_grouped$Gender)
plot(df_grouped$mean_prop ~ df_grouped$Age)
ggplot(df_grouped,
aes(x = HBB.genotype, y = Gametocyte_counts/Total_parasite_counts,color = HBB.genotype)) +
geom_boxplot() +
geom_jitter(width = 0.1)
ggplot(df_grouped,
aes(fill = HBB.genotype, x = Gametocyte_counts/Total_parasite_counts)) +
geom_density(alpha=0.4) +
geom_vline(data=df_grouped %>% group_by(HBB.genotype) %>% summarise(genotype.median = median(Gametocyte_counts/Total_parasite_counts)), aes(xintercept=genotype.median, color=HBB.genotype),
linetype="dashed")
More detailed plots of conversion rate:
ggplot(df, aes(x = varATS_par_µL_D0, y = Gametocyte_counts/Total_parasite_counts, color = HBB.genotype)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.4)
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype,
group = Sample)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ HBB.genotype) +
labs(title = "log(varATS) versus log(SCR) (all technical replicates)")
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype,
group = Sample)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ Village) +
labs(title = "log(varATS) versus SCR (all technical replicates) - facet by village")
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype,
group = Sample)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ HBB.genotype*Village) +
labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by village*genotype")
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ Gender) +
labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by gender")
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ Ethnicity) +
labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by ethnicity")
ggplot(df, aes(x = log(varATS_par_µL_D0),
y = log(Gametocyte_counts / Total_parasite_counts),
color = HBB.genotype,
fill = HBB.genotype)) +
geom_jitter(height = 0.05, width = 0, alpha = 0.3) +
stat_summary(fun = mean, geom = "point", size = 3, shape=23, fill="grey", alpha=0.9) +
facet_wrap(~ Symptomatic) +
labs(title = "log(varATS) versus log(SCR) (all technical replicates) - facet by symptomatic")
Hb genotype shows a strong visual association with SCR.
The demographic factors are less pronounced, barring possibly village. However, there are only 2 samples in village 4, both of which have higher than average parasitemia. This effect is probably not relevant in our data, but might be caused by outlying samples. We likely do not have enough representative high parasitemia samples (across different villages) to make any conclusive statements about this association.
Looking at the SCR vs parasitemia plots (grouped by genotype), there does not appear to be a clear association between the two. However, the two highest parasitemia samples (all in HbAS) do show the highest SCR, which might underlies the interaction effect between parasitemia*genotype we observe in some of the models (see model comparison). Because of the high variation in SCR across parasitemia, the stratification of parasitemia by genotype and the fact that we do not have a well balanced representation across different parasitemia values, we opt to be careful here to avoid over-interpreting any significant parasitemia (or interaction) effects.
Because of the small sample size and unbalanced groups, and its effect on model assumptions for more complex models (see model comparison), we therefore focus mainly on a descriptive modelling approach with simple parsimonous models with the single parameter of interest (Hbb genotype), which is backed by the likelihood ratio tests shown in the model comparison section. That being said, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.
To keep things interpretable, and taking into account the fact that some variables are not covered by a lot of samples (e.g. sparse/uneven representation across parasitemia and village), plus the fact that most estimates remain consistent in directionality and magnitude and LRTs favour it, we opt for the most parsimonious model:
A generalized linear mixed model using the beta-binomial family with a random effect to account for the technical replicates and a per-genotype dispersion parameter.
\[SCR \sim HBB.genotype + (1 | Sample)\]
\[\phi \sim HBB.genotype\]
However, as discussed in the model comparison section, this approach cannot fully exclude confounding or interaction effects, and sample size and power limitations should still be kept in mind when interpreting some of these GL(M)M results, with emphasis on effect sizes over exact p-values.
For the Bayesian parameter estimates (which are less liberal than the standard Wald Z estimates), we had to resort to a more simple binomial model (which exhibits some overdispersion), because there were convergence issues with the more complex beta-binomial one.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 840 858 -413 826 89
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1984 0.4454
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5764 0.1254 -28.510 < 2e-16 ***
## HBB.genotypeHbAC 0.5998 0.1832 3.273 0.00106 **
## HBB.genotypeHbAS 2.0997 0.2642 7.948 1.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.3205 0.6509 11.246 < 2e-16 ***
## HBB.genotypeHbAC -0.4812 0.8574 -0.561 0.575
## HBB.genotypeHbAS -4.4575 0.8007 -5.567 2.59e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
All model assumptions are met:
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1782, p-value = 0.556
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
The estimated odds ratios (using Wald Z-tests) for the different genotype contrasts are as follows:
emm <- emmeans(model, ~ HBB.genotype)
pairs(emm, adjust="bonferroni", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.82 0.334 Inf 1.17 2.82 1 3.273 0.0032
## HbAS / HbAA 8.16 2.160 Inf 4.34 15.37 1 7.948 <.0001
## HbAS / HbAC 4.48 1.200 Inf 2.36 8.52 1 5.593 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: bonferroni method for 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: bonferroni method for 3 tests
## Tests are performed on the log odds ratio scale
The above table not only shows the estimated odds ratio and its
corresponding p-value, but also the confidence interval of this odds
ratio (= asymp.LCL and asymp.UCL).
These p-values are adjusted for multiple testing using the Bonferroni method.
The estimates are already transformed from the log-scale, so they can be directly interpreted as odds ratios (OR).
The estimated SCR per genotype is, adjusted for multiple testing using the Bonferroni method:
confint(emm, type="response", adjust="bonferroni")
As a more conservative estimate, we can use a Bayesian approach while fitting a regular binomial generalized linear mixed model with the same random effect. The estimated credible intervals for the SRC and odds ratios between genotypes should be less overly optimistic (more conservative).
This binomial model does not fulfil all model assumptions (degree of heteroskedasticity), but the estimates are in line with the beta-binomial model.
# fit a bayesian GLMM
brm_model <- brm(
Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample),
family = binomial,
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: binomial
## Links: mu = logit
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample)
## Data: df (Number of observations: 96)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.63 0.09 0.48 0.83 1.00 2729 5612
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.58 0.17 -3.92 -3.24 1.00 1948 2832
## HBB.genotypeHbAC 0.60 0.25 0.08 1.10 1.00 1907 3186
## HBB.genotypeHbAS 1.93 0.31 1.32 2.53 1.00 2832 4622
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Model assumptions (fit via glmer in order to use DHARMa):
bm_model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
family = binomial, data = df)
summary(bm_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 1276.0 1286.2 -634.0 1268.0 92
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -12.2206 -0.9947 0.0491 0.7415 15.3930
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.3268 0.5717
## Number of obs: 96, groups: Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5841 0.1558 -23.002 < 2e-16 ***
## HBB.genotypeHbAC 0.5962 0.2284 2.610 0.00905 **
## HBB.genotypeHbAS 1.9299 0.2816 6.852 7.26e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) HBB.HAC
## HBB.gntyHAC -0.682
## HBB.gntyHAS -0.553 0.377
# Simulate residuals
sim_res <- simulateResiduals(bm_model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.7091, p-value = 0.13
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
Bayesian odds ratios estimates and credible intervals:
brm_emm <- emmeans(brm_model, ~ HBB.genotype)
pairs(brm_emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio lower.HPD upper.HPD
## HbAC / HbAA 1.82 0.992 2.84
## HbAS / HbAA 6.89 3.383 11.87
## HbAS / HbAC 3.80 1.755 6.52
##
## Point estimate displayed: median
## Results are back-transformed from the log odds ratio scale
## HPD interval probability: 0.95
Bayesian SCR estimates:
confint(brm_emm, type="response")
Briefly:
On adding additional covariates:
Since we have such small sample (and group) sizes, we need to be careful in adding too many covariates to the model, and model assumptions still need to be met for these more complex models. On the other hand, we should also avoid missing any important biological factors or potential confounding factors.
LRTs of complex vs reduced model indicate that the simple models are OK to use. Since these focus on the main biological factor of interest, we will opt to use these and clarify in the text that we are unable to exclude the potential relevance of other factors.
Moreover, the direction and magnitude of our estimates remains similar across the different models.
Decision:
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 1276.0 1286.2 -634.0 1268.0 92
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -12.2206 -0.9947 0.0491 0.7415 15.3930
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.3268 0.5717
## Number of obs: 96, groups: Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5841 0.1558 -23.002 < 2e-16 ***
## HBB.genotypeHbAC 0.5962 0.2284 2.610 0.00905 **
## HBB.genotypeHbAS 1.9299 0.2816 6.852 7.26e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) HBB.HAC
## HBB.gntyHAC -0.682
## HBB.gntyHAS -0.553 0.377
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.7091, p-value = 0.13
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.82 0.415 Inf 1.06 3.10 1 2.610 0.0246
## HbAS / HbAA 6.89 1.940 Inf 3.56 13.33 1 6.852 <.0001
## HbAS / HbAC 3.80 1.090 Inf 1.93 7.45 1 4.631 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 1276.1 1288.9 -633.1 1266.1 91
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -12.2497 -0.9812 0.0147 0.7501 15.3566
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.3066 0.5537
## Number of obs: 96, groups: Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5962 0.1514 -23.758 < 2e-16 ***
## HBB.genotypeHbAC 0.6586 0.2260 2.914 0.00357 **
## HBB.genotypeHbAS 1.8723 0.2761 6.782 1.19e-11 ***
## scale(varATS_par_µL_D0) 0.1443 0.1046 1.379 0.16783
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
## (Intr) HBB.HAC HBB.HAS
## HBB.gntyHAC -0.679
## HBB.gntyHAS -0.537 0.335
## s(ATS__µL_D -0.060 0.200 -0.151
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.64, p-value = 0.162
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.93 0.437 Inf 1.14 3.28 1 2.914 0.0100
## HbAS / HbAA 6.50 1.800 Inf 3.41 12.42 1 6.782 <.0001
## HbAS / HbAC 3.37 0.984 Inf 1.70 6.68 1 4.152 0.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 1276.2 1294.1 -631.1 1262.2 89
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -12.2810 -0.9965 0.0406 0.7505 15.3173
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.2704 0.52
## Number of obs: 96, groups: Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5745 0.1429 -25.007 < 2e-16
## HBB.genotypeHbAC 0.7976 0.2810 2.839 0.00453
## HBB.genotypeHbAS 1.7840 0.2647 6.740 1.58e-11
## scale(varATS_par_µL_D0) -0.0957 0.1571 -0.609 0.54234
## HBB.genotypeHbAC:scale(varATS_par_µL_D0) 0.7020 0.5658 1.241 0.21466
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) 0.3788 0.2038 1.859 0.06309
##
## (Intercept) ***
## HBB.genotypeHbAC **
## HBB.genotypeHbAS ***
## scale(varATS_par_µL_D0)
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
## (Intr) HBB.HAC HBB.HAS s(ATS_ HBBHAC
## HBB.gntyHAC -0.509
## HBB.gntyHAS -0.540 0.275
## s(ATS__µL_D -0.095 0.048 0.051
## HBBHAC:(ATS 0.026 0.629 -0.014 -0.278
## HBBHAS:(ATS 0.073 -0.037 -0.191 -0.771 0.214
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.3675, p-value = 0.374
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 2.22 0.624 Inf 1.15 4.29 1 2.839 0.0126
## HbAS / HbAA 5.95 1.580 Inf 3.20 11.07 1 6.740 <.0001
## HbAS / HbAC 2.68 0.882 Inf 1.24 5.80 1 3.000 0.0076
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Still shows heteroskedasticity…
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 909.8 922.6 -449.9 899.8 91
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.2459 0.4958
## Number of obs: 96, groups: Sample, 32
##
## Dispersion parameter for betabinomial family (): 126
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.4901 0.1572 -22.208 < 2e-16 ***
## HBB.genotypeHbAC 0.5614 0.2230 2.517 0.0118 *
## HBB.genotypeHbAS 1.8728 0.2635 7.108 1.18e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.7026, p-value = 0.1
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.75 0.391 Inf 1.04 2.96 1 2.517 0.0317
## HbAS / HbAA 6.51 1.710 Inf 3.51 12.07 1 7.108 <.0001
## HbAS / HbAC 3.71 0.985 Inf 1.99 6.92 1 4.939 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Adding per-genotype dispersion factor appears to improve model fit.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
# HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
# HBB.genotype + (1 | Sample),
# HBB.genotype + scale(varATS_par_µL_D0) + HBB.genotype*scale(varATS_par_µL_D0) + (1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 840 858 -413 826 89
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1984 0.4454
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5764 0.1254 -28.510 < 2e-16 ***
## HBB.genotypeHbAC 0.5998 0.1832 3.273 0.00106 **
## HBB.genotypeHbAS 2.0997 0.2642 7.948 1.89e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.3205 0.6509 11.246 < 2e-16 ***
## HBB.genotypeHbAC -0.4812 0.8574 -0.561 0.575
## HBB.genotypeHbAS -4.4575 0.8007 -5.567 2.59e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1782, p-value = 0.556
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.82 0.334 Inf 1.19 2.8 1 3.273 0.0031
## HbAS / HbAA 8.16 2.160 Inf 4.40 15.2 1 7.948 <.0001
## HbAS / HbAC 4.48 1.200 Inf 2.39 8.4 1 5.593 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Adding parasitemia leads to strong quantile deviations in the residuals.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
# HBB.genotype (1 | Sample),
HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
# HBB.genotype + (1 | Sample),
# HBB.genotype + scale(varATS_par_µL_D0) + HBB.genotype*scale(varATS_par_µL_D0) + (1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 840.9 861.4 -412.5 824.9 88
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1959 0.4426
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.58461 0.12504 -28.667 < 2e-16 ***
## HBB.genotypeHbAC 0.64247 0.18665 3.442 0.000577 ***
## HBB.genotypeHbAS 2.04853 0.26547 7.716 1.2e-14 ***
## scale(varATS_par_µL_D0) 0.09986 0.09487 1.053 0.292505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.2975 0.6469 11.281 < 2e-16 ***
## HBB.genotypeHbAC -0.4574 0.8538 -0.536 0.592
## HBB.genotypeHbAS -4.3602 0.7887 -5.528 3.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2095, p-value = 0.512
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.90 0.355 Inf 1.23 2.94 1 3.442 0.0017
## HbAS / HbAA 7.76 2.060 Inf 4.16 14.45 1 7.716 <.0001
## HbAS / HbAC 4.08 1.140 Inf 2.12 7.86 1 5.026 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Adding an interaction effect appears to fix the pattern/dependencies in the residuals, but i) leads to some amount of heteroskedasticity and ii) results in fitting warnings (step failure).
# create scaled parasitemia for interpretation in emmeans
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scaled_varATS_par_µL_D0 + (1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df %>% mutate(scaled_varATS_par_µL_D0 = scale(varATS_par_µL_D0))
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype * scaled_varATS_par_µL_D0 + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df %>% mutate(scaled_varATS_par_µL_D0 = scale(varATS_par_µL_D0))
##
## AIC BIC logLik -2*log(L) df.resid
## 840.6 866.3 -410.3 820.6 86
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1659 0.4074
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.56662 0.11631 -30.665 < 2e-16
## HBB.genotypeHbAC 0.79805 0.22593 3.532 0.000412
## HBB.genotypeHbAS 1.96085 0.25804 7.599 2.98e-14
## scaled_varATS_par_µL_D0 -0.08766 0.12853 -0.682 0.495211
## HBB.genotypeHbAC:scaled_varATS_par_µL_D0 0.68526 0.45401 1.509 0.131212
## HBB.genotypeHbAS:scaled_varATS_par_µL_D0 0.32055 0.17911 1.790 0.073516
##
## (Intercept) ***
## HBB.genotypeHbAC ***
## HBB.genotypeHbAS ***
## scaled_varATS_par_µL_D0
## HBB.genotypeHbAC:scaled_varATS_par_µL_D0
## HBB.genotypeHbAS:scaled_varATS_par_µL_D0 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.3117 0.6509 11.233 < 2e-16 ***
## HBB.genotypeHbAC -0.4904 0.8547 -0.574 0.566
## HBB.genotypeHbAS -4.3815 0.7834 -5.593 2.23e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
## Warning in newton(lsp = lsp, X = G$X, y = G$y, Eb = G$Eb, UrS = G$UrS, L = G$L,
## : Fitting terminated with step failure - check results carefully
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0895, p-value = 0.702
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
Since there is an interaction effect here, it is not trivial to just predict the effect/odds ratio for the different genotypes. We can however visualise how the association with parasitemia changes for these different genotype groups (i.e. the slope of the parasitemia effect):
# slope of parasitemia for different genotypes
emtrends(model, pairwise ~ HBB.genotype, var = "scaled_varATS_par_µL_D0")
## $emtrends
## HBB.genotype scaled_varATS_par_µL_D0.trend SE df asymp.LCL asymp.UCL
## HbAA -0.0877 0.129 Inf -0.3396 0.164
## HbAC 0.5976 0.435 Inf -0.2558 1.451
## HbAS 0.2329 0.125 Inf -0.0115 0.477
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## HbAA - HbAC -0.685 0.454 Inf -1.509 0.2865
## HbAA - HbAS -0.321 0.179 Inf -1.790 0.1730
## HbAC - HbAS 0.365 0.453 Inf 0.805 0.6997
##
## P value adjustment: tukey method for comparing a family of 3 estimates
# visualise slopes
emmip(model, HBB.genotype ~ scaled_varATS_par_µL_D0, cov.reduce = range)
## Warning: `aes_()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`
## ℹ The deprecated feature was likely used in the emmeans package.
## Please report the issue at <https://github.com/rvlenth/emmeans/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • linetype : "HBB.genotype"
It appears that the parasitemia association is slightly negative for HbAA and positive for the others. However, none of the slopes are significant. As we saw in the exploratory plots, parasitemia is quite variable and a few high/low samples are enough to create these slope lines.
When we make the contrasts between the genotype groups, we need to choose at which level of parasitemia to do so. The mean value seems to give higher effect sizes than in the genotype-only models (likely because there are a few high outlying parasitemia samples), but the median parasitemia comparison seems to be more in-line with previous models.
# emm <- emmeans(model, ~ HBB.genotype)
# pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
# compare genotypes at average parasitemia
emm <- emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, cov.reduce=mean)
pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## scaled_varATS_par_µL_D0 = -2.62574566762518e-17:
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 2.22 0.502 Inf 1.31 3.77 1 3.532 0.0012
## HbAS / HbAA 7.11 1.830 Inf 3.88 13.01 1 7.599 <.0001
## HbAS / HbAC 3.20 0.963 Inf 1.58 6.48 1 3.862 0.0003
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
# compare genotypes at median parasitemia instead
emm <- emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, cov.reduce=median)
pairs(emm, simple = "HBB.genotype", adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## scaled_varATS_par_µL_D0 = -0.324263549611677:
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.78 0.315 Inf 1.17 2.69 1 3.253 0.0033
## HbAS / HbAA 6.40 1.780 Inf 3.34 12.26 1 6.698 <.0001
## HbAS / HbAC 3.60 0.992 Inf 1.89 6.87 1 4.648 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
# compare at specific value using emmeans(model, ~ HBB.genotype*scaled_varATS_par_µL_D0, at = list(scaled_varATS_par_µL_D0 = 40)), but needs to happen in scaled range
None of the beta binomial models defined above can be fit using the Bayesian approach (“parts of the model have not converged” warning).
An alternative approach using a regular binomial model plus a per-observation random factor (which is an alternative approach to modelling overdispersion) does converge, but at the expense of homogeneity of variance (see next section).
brm_model <- brm(
bf(
Gametocyte_counts | trials(Total_parasite_counts) ~
HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample),
phi ~ HBB.genotype # dispersion (precision) depends on genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 4,
# control = list(adapt_delta = 0.95),
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 6612 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 3378 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 6.69, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 6612 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta_binomial
## Links: mu = logit; phi = log
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample)
## phi ~ HBB.genotype
## Data: df (Number of observations: 96)
## Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 10000
##
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 1.36 1.59 0.07 4.53 2.01 5 7
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept -1.06 7.74 -24.00 8.07 2.40 7
## phi_Intercept 130.21 122.08 15.71 436.19 2.11 6
## HBB.genotypeHbAC 7.02 17.81 -27.09 48.89 2.08 9
## HBB.genotypeHbAS -7.40 21.25 -41.20 27.06 6.69 4
## scalevarATS_par_µL_D0 2.96 2.92 -0.13 7.15 2.29 5
## phi_HBB.genotypeHbAC -38.29 144.47 -292.41 304.66 1.95 10
## phi_HBB.genotypeHbAS 73.33 198.89 -127.45 393.59 5.37 5
## Tail_ESS
## Intercept 17
## phi_Intercept 4
## HBB.genotypeHbAC 21
## HBB.genotypeHbAS 6
## scalevarATS_par_µL_D0 25
## phi_HBB.genotypeHbAC 5
## phi_HBB.genotypeHbAS NA
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
bf(
Gametocyte_counts | trials(Total_parasite_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample),
phi ~ HBB.genotype # dispersion (precision) depends on genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 4,
# control = list(adapt_delta = 0.95),
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 2845 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 1097 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.71, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 2845 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta_binomial
## Links: mu = logit; phi = log
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample)
## phi ~ HBB.genotype
## Data: df (Number of observations: 96)
## Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 10000
##
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.56 0.18 0.35 0.87 1.21 14 27
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI
## Intercept -3.47 1.60 -3.88 -3.23
## phi_Intercept 22.18 23.34 6.43 77.99
## HBB.genotypeHbAC 0.10 9.84 -26.27 15.71
## HBB.genotypeHbAS 1.72 1.65 1.18 2.60
## scalevarATS_par_µL_D0 0.00 0.84 -0.39 1.77
## HBB.genotypeHbAC:scalevarATS_par_µL_D0 -6.57 25.48 -75.75 14.31
## HBB.genotypeHbAS:scalevarATS_par_µL_D0 0.26 0.87 -1.50 0.73
## phi_HBB.genotypeHbAC 19.30 89.23 -37.66 304.80
## phi_HBB.genotypeHbAS -18.40 22.54 -74.66 -3.27
## Rhat Bulk_ESS Tail_ESS
## Intercept 1.27 13 91
## phi_Intercept 1.44 8 17
## HBB.genotypeHbAC 1.57 12 11
## HBB.genotypeHbAS 1.35 11 6
## scalevarATS_par_µL_D0 1.10 25 26
## HBB.genotypeHbAC:scalevarATS_par_µL_D0 2.45 17 12
## HBB.genotypeHbAS:scalevarATS_par_µL_D0 1.15 18 34
## phi_HBB.genotypeHbAC 2.05 5 16
## phi_HBB.genotypeHbAS 1.42 8 26
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
bf(
Gametocyte_counts | trials(Total_parasite_counts) ~
HBB.genotype + (1 | Sample),
phi ~ HBB.genotype # dispersion (precision) depends on genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 4,
# control = list(adapt_delta = 0.95),
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 7274 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 2726 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.34, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 7274 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta_binomial
## Links: mu = logit; phi = log
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample)
## phi ~ HBB.genotype
## Data: df (Number of observations: 96)
## Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 10000
##
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.13 2.06 0.18 6.62 1.83 9 48
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 4.36 13.42 -23.57 31.02 1.37 9
## phi_Intercept 171.54 120.05 57.08 540.69 1.49 8
## HBB.genotypeHbAC -3.32 21.02 -53.13 47.35 1.36 12
## HBB.genotypeHbAS -17.80 41.77 -142.23 37.90 1.43 11
## phi_HBB.genotypeHbAC -70.30 118.10 -354.83 156.40 1.21 17
## phi_HBB.genotypeHbAS 38.60 123.85 -237.69 353.30 1.51 36
## Tail_ESS
## Intercept 34
## phi_Intercept 4
## HBB.genotypeHbAC 29
## HBB.genotypeHbAS 14
## phi_HBB.genotypeHbAC 114
## phi_HBB.genotypeHbAS 52
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
brm_model <- brm(
bf(
Gametocyte_counts | trials(Total_parasite_counts) ~
HBB.genotype + (1 | Sample),
phi ~ HBB.genotype # dispersion (precision) depends on genotype
),
family = beta_binomial(link = "logit"),
data = df,
iter = 5000,
chains = 4,
# control = list(adapt_delta = 0.95),
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
## Warning: There were 7274 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 2726 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: There were 1 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 2.34, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
summary(brm_model)
## Warning: Parts of the model have not converged (some Rhats are > 1.05). Be
## careful when analysing the results! We recommend running more iterations and/or
## setting stronger priors.
## Warning: There were 7274 divergent transitions after warmup. Increasing
## adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Family: beta_binomial
## Links: mu = logit; phi = log
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample)
## phi ~ HBB.genotype
## Data: df (Number of observations: 96)
## Draws: 4 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 10000
##
## Multilevel Hyperparameters:
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 2.13 2.06 0.18 6.62 1.83 9 48
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 4.36 13.42 -23.57 31.02 1.37 9
## phi_Intercept 171.54 120.05 57.08 540.69 1.49 8
## HBB.genotypeHbAC -3.32 21.02 -53.13 47.35 1.36 12
## HBB.genotypeHbAS -17.80 41.77 -142.23 37.90 1.43 11
## phi_HBB.genotypeHbAC -70.30 118.10 -354.83 156.40 1.21 17
## phi_HBB.genotypeHbAS 38.60 123.85 -237.69 353.30 1.51 36
## Tail_ESS
## Intercept 34
## phi_Intercept 4
## HBB.genotypeHbAC 29
## HBB.genotypeHbAS 14
## phi_HBB.genotypeHbAC 114
## phi_HBB.genotypeHbAS 52
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Since the betabinomial models do not converge when using the Bayesian fitting method, we could use an observation-level random effects (OLRE) to account for overdispersion instead:
df$obs <- seq_len(nrow(df))
brm_model <- brm(
Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) + (1 | obs),
family = binomial,
data = df,
iter = 5000,
chains = 8,
seed = 42,
cores = 8
)
## Compiling Stan program...
## Start sampling
summary(brm_model)
## Family: binomial
## Links: mu = logit
## Formula: Gametocyte_counts | trials(Total_parasite_counts) ~ HBB.genotype + (1 | Sample) + (1 | obs)
## Data: df (Number of observations: 96)
## Draws: 8 chains, each with iter = 5000; warmup = 2500; thin = 1;
## total post-warmup draws = 20000
##
## Multilevel Hyperparameters:
## ~obs (Number of levels: 96)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.28 0.03 0.22 0.34 1.00 8732 13649
##
## ~Sample (Number of levels: 32)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept) 0.61 0.09 0.46 0.82 1.00 7581 11682
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -3.59 0.17 -3.94 -3.25 1.00 6059 9312
## HBB.genotypeHbAC 0.60 0.25 0.08 1.10 1.00 6595 8893
## HBB.genotypeHbAS 1.92 0.31 1.30 2.54 1.00 7402 9327
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Assessing model fit for the ORLE requires the model to be fit using lme4 instead of brms.
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample) + (1 | obs),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample) + (1 | obs)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 852.3 865.1 -421.1 842.3 91
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.26424 -0.24057 -0.01068 0.19490 0.91999
##
## Random effects:
## Groups Name Variance Std.Dev.
## obs (Intercept) 0.07229 0.2689
## Sample (Intercept) 0.30183 0.5494
## Number of obs: 96, groups: obs, 96; Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5994 0.1556 -23.126 < 2e-16 ***
## HBB.genotypeHbAC 0.6004 0.2281 2.632 0.0085 **
## HBB.genotypeHbAS 1.9233 0.2813 6.837 8.08e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) HBB.HAC
## HBB.gntyHAC -0.682
## HBB.gntyHAS -0.553 0.377
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.6311, p-value = 0.162
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.82 0.416 Inf 1.07 3.11 1 2.632 0.0231
## HbAS / HbAA 6.84 1.930 Inf 3.54 13.23 1 6.837 <.0001
## HbAS / HbAC 3.75 1.080 Inf 1.91 7.37 1 4.599 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Unfortunately, the OLRE doesn’t seem to fix the overdispersion/per-genotype dispersion differences entirely (i.e. betabinomial with per-genotype dispersion is still preferred).
Even adding parasitemia to the OLRE, the problems remain.
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 852.6 868.0 -420.3 840.6 90
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.26339 -0.23481 -0.00905 0.19339 0.88925
##
## Random effects:
## Groups Name Variance Std.Dev.
## obs (Intercept) 0.07227 0.2688
## Sample (Intercept) 0.28309 0.5321
## Number of obs: 96, groups: obs, 96; Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.6110 0.1515 -23.828 < 2e-16 ***
## HBB.genotypeHbAC 0.6604 0.2263 2.919 0.00351 **
## HBB.genotypeHbAS 1.8680 0.2764 6.758 1.4e-11 ***
## scale(varATS_par_µL_D0) 0.1387 0.1047 1.324 0.18535
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
## (Intr) HBB.HAC HBB.HAS
## HBB.gntyHAC -0.679
## HBB.gntyHAS -0.537 0.336
## s(ATS__µL_D -0.060 0.200 -0.151
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.5688, p-value = 0.192
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.94 0.438 Inf 1.14 3.29 1 2.919 0.0098
## HbAS / HbAA 6.48 1.790 Inf 3.39 12.38 1 6.758 <.0001
## HbAS / HbAC 3.35 0.979 Inf 1.68 6.64 1 4.127 0.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs),
family = binomial, data = df)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype * scale(varATS_par_µL_D0) + (1 | Sample) + (1 | obs)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 852.8 873.3 -418.4 836.8 88
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.27715 -0.24668 -0.01384 0.18366 0.85689
##
## Random effects:
## Groups Name Variance Std.Dev.
## obs (Intercept) 0.07219 0.2687
## Sample (Intercept) 0.24851 0.4985
## Number of obs: 96, groups: obs, 96; Sample, 32
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.58993 0.14352 -25.014 < 2e-16
## HBB.genotypeHbAC 0.80315 0.28208 2.847 0.00441
## HBB.genotypeHbAS 1.78272 0.26576 6.708 1.97e-11
## scale(varATS_par_µL_D0) -0.09486 0.15767 -0.602 0.54741
## HBB.genotypeHbAC:scale(varATS_par_µL_D0) 0.70494 0.56801 1.241 0.21458
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) 0.36739 0.20462 1.796 0.07257
##
## (Intercept) ***
## HBB.genotypeHbAC **
## HBB.genotypeHbAS ***
## scale(varATS_par_µL_D0)
## HBB.genotypeHbAC:scale(varATS_par_µL_D0)
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in abbreviate(rn, minlength = 11): abbreviate used with non-ASCII chars
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate used with non-ASCII chars
## (Intr) HBB.HAC HBB.HAS s(ATS_ HBBHAC
## HBB.gntyHAC -0.509
## HBB.gntyHAS -0.540 0.275
## s(ATS__µL_D -0.095 0.048 0.051
## HBBHAC:(ATS 0.026 0.629 -0.014 -0.278
## HBBHAS:(ATS 0.073 -0.037 -0.192 -0.771 0.214
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.3323, p-value = 0.394
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 2.23 0.630 Inf 1.15 4.32 1 2.847 0.0123
## HbAS / HbAA 5.95 1.580 Inf 3.19 11.08 1 6.708 <.0001
## HbAS / HbAC 2.66 0.879 Inf 1.23 5.77 1 2.967 0.0085
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Adding too many additional variables could be problematic due to small sample/group sizes.
Model assumptions remain problematic without adding the interaction effect.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype +
scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + scale(Age) + Gender +
## Village + Ethnicity + Symptomatic + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 851.4 889.9 -410.7 821.4 81
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1774 0.4212
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.53832 0.28495 -12.417 < 2e-16 ***
## HBB.genotypeHbAC 0.60207 0.20377 2.955 0.00313 **
## HBB.genotypeHbAS 1.91313 0.27670 6.914 4.71e-12 ***
## scale(varATS_par_µL_D0) 0.16761 0.11268 1.487 0.13691
## scale(Age) -0.01076 0.08702 -0.124 0.90162
## Gender1 -0.11956 0.19471 -0.614 0.53916
## Village2 0.01520 0.26069 0.058 0.95352
## Village3 0.15883 0.26569 0.598 0.54998
## Village4 0.49318 0.42390 1.163 0.24465
## Ethnicity2 0.03488 0.27032 0.129 0.89734
## Symptomatic1 -0.10713 0.21216 -0.505 0.61360
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.2872 0.6453 11.292 < 2e-16 ***
## HBB.genotypeHbAC -0.4302 0.8535 -0.504 0.614
## HBB.genotypeHbAS -4.2988 0.7853 -5.474 4.39e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2155, p-value = 0.472
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.83 0.372 Inf 1.13 2.94 1 2.955 0.0088
## HbAS / HbAA 6.77 1.870 Inf 3.54 12.96 1 6.914 <.0001
## HbAS / HbAC 3.71 1.090 Inf 1.87 7.37 1 4.476 <.0001
##
## Results are averaged over the levels of: Gender, Village, Ethnicity, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Variables do not seem to be overly correlated though.
performance::check_collinearity(model)
The model assumptions improve when we add the parasitemia*genotype interaction effect.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) +
scale(Age) + Gender + Village + Ethnicity + Symptomatic +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype * scale(varATS_par_µL_D0) + scale(Age) + Gender +
## Village + Ethnicity + Symptomatic + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 846.1 889.7 -406.0 812.1 79
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1139 0.3375
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.494115 0.254713 -13.718 < 2e-16
## HBB.genotypeHbAC 0.817530 0.211227 3.870 0.000109
## HBB.genotypeHbAS 1.764612 0.254655 6.929 4.23e-12
## scale(varATS_par_µL_D0) -0.106636 0.133963 -0.796 0.426026
## scale(Age) 0.004667 0.071984 0.065 0.948311
## Gender1 -0.124449 0.165801 -0.751 0.452898
## Village2 -0.073977 0.245693 -0.301 0.763341
## Village3 0.130038 0.225411 0.577 0.564011
## Village4 0.522427 0.397570 1.314 0.188829
## Ethnicity2 0.377407 0.244998 1.540 0.123451
## Symptomatic1 -0.098498 0.177813 -0.554 0.579617
## HBB.genotypeHbAC:scale(varATS_par_µL_D0) 1.258240 0.470477 2.674 0.007487
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) 0.423132 0.174221 2.429 0.015153
##
## (Intercept) ***
## HBB.genotypeHbAC ***
## HBB.genotypeHbAS ***
## scale(varATS_par_µL_D0)
## scale(Age)
## Gender1
## Village2
## Village3
## Village4
## Ethnicity2
## Symptomatic1
## HBB.genotypeHbAC:scale(varATS_par_µL_D0) **
## HBB.genotypeHbAS:scale(varATS_par_µL_D0) *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.2953 0.6510 11.206 < 2e-16 ***
## HBB.genotypeHbAC -0.4312 0.8560 -0.504 0.614
## HBB.genotypeHbAS -4.4712 0.7904 -5.657 1.54e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.0441, p-value = 0.84
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
## NOTE: Results may be misleading due to involvement in interactions
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 2.26 0.478 Inf 1.38 3.72 1 3.870 0.0003
## HbAS / HbAA 5.84 1.490 Inf 3.21 10.61 1 6.929 <.0001
## HbAS / HbAC 2.58 0.756 Inf 1.30 5.12 1 3.232 0.0035
##
## Results are averaged over the levels of: Gender, Village, Ethnicity, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)
But even just adding gender and parasitemia, without the interaction effect, also seems to work, even though gender itself is once again not significant.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + scale(varATS_par_µL_D0) + Gender +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + Gender + (1 | Sample)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 842.4 865.5 -412.2 824.4 87
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 0.1952 0.4418
## Number of obs: 96, groups: Sample, 32
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5094 0.1609 -21.817 < 2e-16 ***
## HBB.genotypeHbAC 0.6323 0.1868 3.385 0.000712 ***
## HBB.genotypeHbAS 2.0239 0.2666 7.592 3.16e-14 ***
## scale(varATS_par_µL_D0) 0.1270 0.1015 1.251 0.210813
## Gender1 -0.1352 0.1833 -0.738 0.460685
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.2902 0.6456 11.291 < 2e-16 ***
## HBB.genotypeHbAC -0.4397 0.8535 -0.515 0.606
## HBB.genotypeHbAS -4.3241 0.7851 -5.508 3.63e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2227, p-value = 0.47
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.88 0.352 Inf 1.21 2.92 1 3.385 0.0021
## HbAS / HbAA 7.57 2.020 Inf 4.05 14.14 1 7.592 <.0001
## HbAS / HbAC 4.02 1.120 Inf 2.09 7.74 1 4.982 <.0001
##
## Results are averaged over the levels of: Gender
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
performance::check_collinearity(model)
LRT suggests that the simple model is the better fit and there is no evidence to include the additional factors.
Risk factors / biological relevant factors:
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype +
scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
model_simple <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
Interaction effect:
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) +
scale(Age) + Gender + Village + Ethnicity + Symptomatic +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
Even simpler model containing only the parasitemia interaction, symptomatic or gender factor do not improve model fit:
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype +
Symptomatic +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype +
Gender +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + scale(varATS_par_µL_D0) + Gender +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype * scale(varATS_par_µL_D0) + Gender +
(1 | Sample),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
anova(model, model_simple, test = "LRT")
Since there appears to be some difference between villages, and there are very few samples for some villages, we could try modelling village as a random factor, but this does not lead to a good fitting model.
model <- glmer(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype + (1 | Sample) + (1 | Village),
family = binomial, data = df)
## boundary (singular) fit: see help('isSingular')
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + (1 | Sample) + (1 | Village)
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 1278.0 1290.8 -634.0 1268.0 91
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -12.2206 -0.9947 0.0491 0.7415 15.3930
##
## Random effects:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 3.268e-01 5.717e-01
## Village (Intercept) 1.290e-09 3.592e-05
## Number of obs: 96, groups: Sample, 32; Village, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5841 0.1558 -23.002 < 2e-16 ***
## HBB.genotypeHbAC 0.5962 0.2284 2.610 0.00905 **
## HBB.genotypeHbAS 1.9299 0.2816 6.852 7.26e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) HBB.HAC
## HBB.gntyHAC -0.682
## HBB.gntyHAS -0.553 0.377
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.7871, p-value = 0.092
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.82 0.415 Inf 1.06 3.10 1 2.610 0.0246
## HbAS / HbAA 6.89 1.940 Inf 3.56 13.33 1 6.852 <.0001
## HbAS / HbAC 3.79 1.090 Inf 1.93 7.45 1 4.631 <.0001
##
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
Adding the extra cofactors improves fit now, but the LRT is still not significantly in favour of the more complex model.
model <- glmmTMB(
cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
HBB.genotype +
scale(varATS_par_µL_D0) + Gender + Symptomatic +
# scale(varATS_par_µL_D0) + scale(Age) + Gender + Village + Ethnicity + Symptomatic +
(1 | Sample) + (1 | Village),
dispformula = ~ HBB.genotype,
family = betabinomial(link = "logit"),
data = df
)
summary(model)
## Family: betabinomial ( logit )
## Formula:
## cbind(Gametocyte_counts, Total_parasite_counts - Gametocyte_counts) ~
## HBB.genotype + scale(varATS_par_µL_D0) + Gender + Symptomatic +
## (1 | Sample) + (1 | Village)
## Dispersion:
## ~HBB.genotype
## Data: df
##
## AIC BIC logLik -2*log(L) df.resid
## 845.3 873.5 -411.6 823.3 85
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## Sample (Intercept) 1.841e-01 4.291e-01
## Village (Intercept) 8.536e-10 2.922e-05
## Number of obs: 96, groups: Sample, 32; Village, 4
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.3592 0.2099 -16.000 < 2e-16 ***
## HBB.genotypeHbAC 0.6503 0.1827 3.559 0.000372 ***
## HBB.genotypeHbAS 1.9439 0.2736 7.106 1.2e-12 ***
## scale(varATS_par_µL_D0) 0.1729 0.1088 1.589 0.112103
## Gender1 -0.1743 0.1830 -0.952 0.340938
## Symptomatic1 -0.2047 0.1897 -1.079 0.280530
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Dispersion model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.2853 0.6450 11.295 < 2e-16 ***
## HBB.genotypeHbAC -0.4264 0.8532 -0.500 0.617
## HBB.genotypeHbAS -4.3573 0.7864 -5.541 3.01e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Simulate residuals
sim_res <- simulateResiduals(model, n = 1000)
plot(sim_res)
# Test for overdispersion
testDispersion(sim_res)
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.2048, p-value = 0.516
## alternative hypothesis: two.sided
# Test for zero-inflation
testZeroInflation(sim_res)
##
## DHARMa zero-inflation test via comparison to expected zeros with
## simulation under H0 = fitted model
##
## data: simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
# plot residuals per group
plotResiduals(sim_res, df$HBB.genotype)
# Compute estimated marginal means for the HBB.genotype factor
emm <- emmeans(model, ~ HBB.genotype)
# Perform pairwise comparisons
pairs(emm, adjust="tukey", ratios=T, type="response", reverse=T, infer = c(TRUE, TRUE))
## contrast odds.ratio SE df asymp.LCL asymp.UCL null z.ratio p.value
## HbAC / HbAA 1.92 0.35 Inf 1.25 2.94 1 3.559 0.0011
## HbAS / HbAA 6.99 1.91 Inf 3.68 13.26 1 7.106 <.0001
## HbAS / HbAC 3.65 1.06 Inf 1.85 7.20 1 4.452 <.0001
##
## Results are averaged over the levels of: Gender, Symptomatic
## Confidence level used: 0.95
## Conf-level adjustment: tukey method for comparing a family of 3 estimates
## Intervals are back-transformed from the log odds ratio scale
## P value adjustment: tukey method for comparing a family of 3 estimates
## Tests are performed on the log odds ratio scale
anova(model, model_simple, test = "LRT")
grateful::cite_packages(output = "table", out.dir = here(), cite.tidyverse = TRUE)